# Required Packages
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import pandas_datareader.data as pdr
import math
from datetime import datetime
from datetime import timedelta
import matplotlib.dates as mdates
# Progress Bar
from IPython.core.display import Image, display
import progressbar
# Plots
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
# sns setting
sns.set_context("paper", rc={"font.size":12,"axes.titlesize":14,"axes.labelsize":12})
# plt setting
sns.set_style('whitegrid')
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
In this article, the Communication Services Sector from Yahoo! Finance is used, and we analyze the current top tech companies' stock prices.
| Symbol | Name | Symbol | Name | Symbol | Name |
|---|---|---|---|---|---|
| AAPL | Apple Inc. | NVDA | NVIDIA Corporation | INTU | Intuit Inc. |
| MSFT | Microsoft Corporation | ACN | Accenture plc | VMW | VMware, Inc. |
| TSM | Taiwan Semiconductor Manufacturing Company Limited | AVGO | Broadcom Inc. | AMAT | Applied Materials, Inc. |
| INTC | Intel Corporation | IBM | International Business Machines Corporation | MU | Micron Technology, Inc. |
| CSCO | Cisco Systems, Inc. | ASML | ASML Holding N.V. | NOW | ServiceNow, Inc. |
| ORCL | Oracle Corporation | TXN | Texas Instruments Incorporated | UBER | Uber Technologies, Inc. |
| SAP | SAP SE | QCOM | QUALCOMM Incorporated | AMD | Advanced Micro Devices, Inc. |
| ADBE | Adobe Inc. | FIS | Fidelity National Information Services, Inc. | ||
| CRM | salesforce.com, inc. | SNE | Sony Corporation |
Stock_list = ['AAPL','MSFT','TSM','INTC','CSCO','ORCL','SAP','ADBE','CRM',
'NVDA','ACN','AVGO','IBM','ASML','TXN','QCOM','FIS','SNE','INTU','VMW','AMAT','MU','NOW','UBER','AMD']
Stock_Dic = {'AAPL':'Apple Inc.','MSFT':'Microsoft Corporation','TSM':'Taiwan Semiconductor Manufacturing Company Limited',
'INTC':'Intel Corporation','CSCO':'Cisco Systems, Inc.','ORCL':'Oracle Corporation',
'SAP':'SAP SE','ADBE':'Adobe Inc.','CRM':'salesforce.com, inc.',
'NVDA':'NVIDIA Corporation','ACN':'Accenture plc','AVGO':'Broadcom Inc.',
'IBM':'International Business Machines Corporation','ASML':'ASML Holding N.V.',
'TXN':'Texas Instruments Incorporated','QCOM':'QUALCOMM Incorporated',
'FIS':'Fidelity National Information Services, Inc.','SNE':'Sony Corporation',
'INTU':'Intuit Inc.','VMW':'VMware, Inc.','AMAT':'Applied Materials, Inc.',
'MU':'Micron Technology, Inc.','NOW':'ServiceNow, Inc.',
'UBER':'Uber Technologies, Inc.','AMD':'Advanced Micro Devices, Inc.'}
The data is collected from The beginning of the five years ago until now.
start, end = [datetime(datetime.today().year-5, 1, 1), datetime.today()]
# start, end = [datetime(datetime.today().year-5,datetime.today().month,datetime.today().day), datetime.today()]
def Timeline_plot(start, end, width = 16):
fig, ax = plt.subplots(figsize=(width, 1))
Temp = pd.date_range(start, datetime(end.year, end.month, 1), freq='MS')
ax.plot((start, end), (0, 0), 'k', alpha=.5)
ax.get_xaxis().set_major_locator(mdates.MonthLocator(interval=2))
ax.get_xaxis().set_major_formatter(mdates.DateFormatter("%b %Y"))
#fig.autofmt_xdate()
_ = plt.setp((ax.get_yticklabels() + ax.get_yticklines() + list(ax.spines.values())), visible=False)
for i in Temp:
ax.scatter(i, 0, s=30, facecolor='#e74c3c', edgecolor='k')
ax.grid(False)
_ = plt.xticks(rotation=90)
Temp1 = Temp.min().replace(day=1) - timedelta(days=1)
Temp2 = Temp.max().replace(day=1) + timedelta(days=31)
_ = ax.set_xlim ([datetime(Temp1.year,Temp1.month, 1), datetime(Temp2.year,Temp2.month, 1)])
Timeline_plot(start, end)
Collecting data from Yahoo Finance!, and creating moving averages for 10, 20 and 60 day periods of time.
def Get_Data(Inp):
Days = [10, 20, 60]
Out = pdr.DataReader(Inp, 'yahoo', start, end)
Out.insert(0, 'Symbol', Inp)
for j in Days:
column_name = "Moving Ave. %s days" % (str(j))
Out[column_name] = Out['Adj Close'].rolling(window=j, center=False).mean()
return Out
Data = Get_Data(Stock_list[0])
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval=len(Stock_list),
widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in Stock_list[1:]:
Counter+=1
Progress_Bar.update(Counter)
Temp = Get_Data(i)
Data = pd.concat([Data, Temp])
del Temp
Progress_Bar.finish()
Displaying today's data only:
Today = Data[Data.index == Data.index[-1]].reset_index(drop = True)
Today.style.hide_index()
print('Currently, the stock with the highest volume is %s' %
Stock_Dic[Today[Today.Volume == Today.Volume.max()].Symbol.values[0]])
Consider AMD for example. We have,
def Disp_Data(Inp, df = Data):
Out = df[df.Symbol == Inp].drop(columns=['Symbol'])
return Out
Temp = Disp_Data('AMD')
Temp.describe()
Temp.head().dropna(axis = 1)
Temp.tail()
Let's plot the stocks adjusted the closing price for all stock data under study.
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18, 12))
for i in range(len(Stock_list)):
Data[Data.Symbol == Stock_list[i]]['Adj Close'].plot(ax=ax, label=Stock_Dic[Stock_list[i]])
_ = ax.legend(loc='upper center', bbox_to_anchor=(1.2, 0.9), shadow=True, ncol=1, fontsize=12)
_, Temp = ax.get_ylim()
_ = ax.set_ylim([0, 350])
We can create some new functions that can be beneficial for applications as well.
def List_Search(Inp, key):
# Inp: A list
Out=list()
for i in range(len(Inp)):
if Inp[i].find(key) != -1:
Out.append(Inp[i])
return Out
def List_Diff(Inp_A, Inp_B):
# Inp_A: A list
# Inp_B: A list
Out=list(set(Inp_A)-set(Inp_B))
return Out
# Creating a new list of Columns
Columns = List_Diff(Data.columns.tolist(), List_Search(Data.columns.tolist(), 'Moving Ave'))
Columns = List_Diff(Columns, ['Symbol'])
Temp = ['Ave ' + i for i in Columns]
# A new DataFrame
Ave_df = pd.DataFrame({'Symbol':Stock_list})
for i in Temp:
Ave_df[i]=''
del Temp
# Progress Bar
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval=len(Stock_list),
widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in range(len(Stock_list)):
Counter+=1
Progress_Bar.update(Counter)
Ave_df.iloc[i,1:] = Data[Data.Symbol == Stock_list[i]][Columns].mean().values
Progress_Bar.finish()
The following table shows the average values for all columns of the Data.
Ave_df.style.hide_index()
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(18, 14), sharex=False)
# Upper Plot
Ave_df.plot.bar(x='Symbol', y='Ave Volume', rot=90,ax=ax[0], legend=False ,color='#34495e', edgecolor='k')
# Lower Plot
Temp = Ave_df.drop(columns=['Ave Volume'])
Temp.plot.bar(x='Symbol', rot=90,ax=ax[1], legend=True, edgecolor='k')
_ = ax[1].legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), shadow=True, ncol=len(Temp.columns)-1, fontsize=12)
# Plot setting
plt.subplots_adjust(hspace=0.3,wspace=0.2)
Let's plot moving averages for 10, 20 and 60 day periods of time for the top 4 companies with the highest volume on average.
def TopN_volumes(N, df = Ave_df):
Out = df.sort_values(by='Ave Volume', ascending=False).iloc[:N,0].tolist()
return Out
# Conisder the Moving Ave and Adj Close columns
Columns = List_Search(Data.columns.tolist(), 'Moving Ave')
Columns.append("Adj Close")
Columns = list(np.sort(Columns))
# A list of top N = 4 companies with the hightest volume on average.
N = 4
mylist = TopN_volumes(N)
# Conisder the Moving Ave and Adj Close columns
Columns = List_Search(Data.columns.tolist(), 'Moving Ave')
Columns.append("Adj Close")
Columns = list(np.sort(Columns))
# A list of top N = 4 companies with the hightest volume on average.
N = 4
mylist = Ave_df.sort_values(by='Ave Volume', ascending=False).iloc[:N,0].tolist()
fig, ax = plt.subplots(nrows = math.ceil(N/2), ncols = 2, figsize=(16, 6*math.ceil(N/2)))
Counter = 0
for i in range(math.ceil(N/2)):
for j in range(2):
Disp_Data(mylist[Counter])[Columns].plot(ax=ax[i,j], legend = True)
_ = ax[i,j].set_title(Stock_Dic[mylist[Counter]])
_ = ax[i,j].legend(loc='upper center', bbox_to_anchor=(0.5, -0.25), shadow=True, ncol=2, fontsize=12)
Counter += 1
plt.subplots_adjust(hspace=0.6, wspace=0.2)
Daily return can be calculated using the percentage change of the adjusted closing price.
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 10))
for i in range(len(mylist)):
Disp_Data(mylist[i])['Adj Close'].pct_change().plot(ax=ax, label=Stock_Dic[mylist[i]])
_ = ax.legend(loc='upper center', bbox_to_anchor=(1.15, 0.9), shadow=True, ncol=1, fontsize=12)
Temp = ax.get_ylim()
_ = ax.set_ylim([np.float(format(Temp[0], '.0e')) ,np.float(format(Temp[1], '.0e'))])
_ = ax.set_title('Daily Return', fontsize=14)
First, we need to create a new data frame by reading the Adj Close column from all stock data under study. We have,
All_data = pdr.DataReader(Stock_list, 'yahoo', start, end)['Adj Close']
All_data.head()
All_data.tail()
The returns can be analyzed using the percentage change from the adj Close.
All_returns = All_data.pct_change()
All_returns.tail()
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 10))
All_returns.plot(marker='*', legend=True, ax=ax)
_ = ax.set_ylim([-0.3 , 0.3])
_ = ax.set_title('Daily Returns', fontsize=14)
_ = ax.legend(Stock_Dic.values(), loc='upper center', bbox_to_anchor=(1.2, 0.9), shadow=True, ncol=1, fontsize=12)
The following graphs show the correlation between different stocks.
_ = sns.jointplot('AAPL','MSFT', All_returns, kind='reg', space=0, size=6, ratio=4)
_ = sns.jointplot('AAPL','AMD', All_returns, kind='reg', space=0, size=6, ratio=4)
Now, we can use the pairplot tool to visualize all.
# Remove missing values
Temp = TopN_volumes(8, df = Ave_df)
Temp = All_returns[Temp].dropna()
# plot
_ = sns.pairplot(Temp, diag_kind='kde')
Nonetheless, the correlation matrix and plot are always convenient to see numerical values for correlations.
# Correlation Matrix
Cor_matrix = Temp.corr()
Cor_matrix
def Correlation_Plot (Df,Fig_Size):
Correlation_Matrix = Df.corr()
mask = np.zeros_like(Correlation_Matrix)
mask[np.triu_indices_from(mask)] = True
for i in range(len(mask)):
mask[i,i]=0
Fig, ax = plt.subplots(figsize=(Fig_Size,Fig_Size))
sns.heatmap(Correlation_Matrix, ax=ax, mask=mask, annot=True, square=True,
cmap =sns.color_palette("RdBu", n_colors=10), linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": .5})
bottom, top = ax.get_ylim()
_ = ax.set_ylim(bottom + 0.5, top - 0.5)
Correlation_Plot (Temp, 8)
Here, darker shades of blue represent a higher correlation.
def Risk_Plot(data):
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 6))
_ = ax.scatter(data.mean(), data.std(), s=25, color = "#e74c3c")
_ = ax.set_xlabel('Expected Return')
_ = ax.set_ylabel('Risk')
for label,x,y in zip(data.columns, data.mean(), data.std()):
plt.annotate(label, xy=(x,y), xytext=(-50,0), textcoords = 'offset points',
ha = 'right', va = 'bottom', arrowprops=dict(facecolor="#9b59b6", shrink=0.001))
plt.autoscale(enable=True, axis='both', tight=True)
return ax.get_xlim()
Temp = Risk_Plot(All_returns)
print("""The current trend seems to output a value between %.2e and %.2e.
We would like to identify a stock with high return and low risk!
""" % Temp)
Let's find the quantile for a stock.
qt = All_returns['AMD'].quantile(0.05)
qt_pct = abs(All_returns['AMD'].quantile(0.05))*100
print('Quantile Percentage: %0.4f' % qt_pct)
print("""The 0.05 empirical quantile of daily returns is at {0:.2f}%.
This means that with 95% confidence, the worst daily loss will not exceed {0:.2f}% (of the investment)."""
.format(qt,qt_pct))
To predict future behaviors, we can implement the Monte Carlo method (also see this link and this link).
# consider a year
days = 365
# Delta t
dt = 1/365
Defining a Monte Carlo function fo the Stock price.
def stock_monte_carlo(start_price, days, mu, sigma):
'''Function takes in stock price, number of days to run, mean and standard deviation values'''
price = np.zeros(days)
price[0] = start_price
shock = np.zeros(days)
drift = np.zeros(days)
for x in range(1,days):
#Shock and drift formulas taken from the Monte Carlo formula
shock[x] = np.random.normal(loc=mu*dt,scale=sigma*np.sqrt(dt))
drift[x] = mu * dt
#New price = Old price + Old price*(shock+drift)
price[x] = price[x-1] + (price[x-1] * (drift[x]+shock[x]))
return price
def Monte_Carlo_Analysis(Inp, mu, sigma, N=1e2, days = days):
# get the data for Inp teach
df = Disp_Data(Inp)
# set the last entry of the open column as the starting price
start_price = df['Open'][-1]
# Ouput Figure
N = int(N)
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 8))
for run in range(100):
_ = plt.plot(stock_monte_carlo(start_price, days, mu, sigma))
_ = ax.set_xlabel('Days')
_ = ax.set_ylabel('Price')
_ = ax.set_title('Monte Carlo Analysis for %s' % Stock_Dic[Inp])
_ = ax.set_xlim([0,days])
return df
def Final_price_distribution_simulations(Inp, mu, sigma, N= 1e4, days = days):
# get the data for Inp teach
df = Disp_Data(Inp)
# set the last entry of the open column as the starting price
start_price = df['Open'][-1]
# Simulations array
N = int(N)
simulations = np.zeros(N)
# Progress Bar
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval= N, widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in range(N):
simulations[i] = stock_monte_carlo(start_price, days, mu, sigma)[days-1]
Counter+=1
Progress_Bar.update(Counter)
Progress_Bar.finish()
return simulations
def Final_price_distribution_plot(simulations, Inp):
# get the data for Inp teach
df = Disp_Data(Inp)
# set the last entry of the open column as the starting price
start_price = df['Open'][-1]
# Output Figure
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 8))
q = np.percentile(simulations, 1)
_ = ax.hist(simulations, bins='auto', color = '#34495e')
_ = plt.figtext(0.75, 0.80, "Start price: $%.2f" % start_price, fontsize = 12)
_ = plt.figtext(0.75, 0.75, "Mean final price: $%.2f" % simulations.mean(), fontsize = 12)
_ = plt.figtext(0.75, 0.70, "VaR(0.99): $%.2f" % (start_price -q,), fontsize = 12)
_ = plt.figtext(0.15,0.665, "q(0.99): $%.2f" % q, fontsize = 12)
_ = ax.set_xlim()
_ = ax.axvline(x=q, linewidth=4, color='#e74c3c')
_ = ax.set_title("Final price distribution for %s after %s days" % (Stock_Dic[Inp], days), weight='bold')
Stock = 'AMD'
# mean
mu = All_returns.mean()[Stock]
# standard deviation
sigma = All_returns.std()[Stock]
# Analysis
Monte_Carlo_Analysis(Stock, mu = mu, sigma = sigma)
The frequencies of different outcomes simulated form a Bell curve. The most likely return is in the middle of the curve. This means there is an equal chance that the actual return will be higher or lower than that value.
Simulations = Final_price_distribution_simulations(Stock, mu = mu, sigma = sigma)
Final_price_distribution_plot(Simulations, Stock)
See more details about Value at Risk (VaR) here.
Stock = 'AAPL'
# mean
mu = All_returns.mean()[Stock]
# standard deviation
sigma = All_returns.std()[Stock]
# Analysis
Monte_Carlo_Analysis(Stock, mu = mu, sigma = sigma)
Simulations = Final_price_distribution_simulations(Stock, mu = mu, sigma = sigma)
Final_price_distribution_plot(Simulations, Stock)
Tech = 'MSFT'
# mean
mu = All_returns.mean()[Stock]
# standard deviation
sigma = All_returns.std()[Stock]
# Analysis
Monte_Carlo_Analysis(Stock, mu = mu, sigma = sigma)
Simulations = Final_price_distribution_simulations(Stock, mu = mu, sigma = sigma)
Final_price_distribution_plot(Simulations, Stock)